{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "from tqdm import tqdm\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('test.conll.txt') as fopen:\n",
    "    corpus = fopen.read().split('\\n')\n",
    "    \n",
    "with open('dev.conll.txt') as fopen:\n",
    "    corpus_test = fopen.read().split('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "word2idx = {'PAD': 0,'NUM':1,'UNK':2}\n",
    "tag2idx = {'PAD': 0}\n",
    "char2idx = {'PAD': 0}\n",
    "word_idx = 3\n",
    "tag_idx = 1\n",
    "char_idx = 1\n",
    "\n",
    "def process_corpus(corpus, until = None):\n",
    "    global word2idx, tag2idx, char2idx, word_idx, tag_idx, char_idx\n",
    "    words, depends, labels = [], [], []\n",
    "    temp_word, temp_depend, temp_label = [], [], []\n",
    "    for sentence in corpus:\n",
    "        if len(sentence):\n",
    "            sentence = sentence.split('\\t')\n",
    "            for c in sentence[1]:\n",
    "                if c not in char2idx:\n",
    "                    char2idx[c] = char_idx\n",
    "                    char_idx += 1\n",
    "            if sentence[7] not in tag2idx:\n",
    "                tag2idx[sentence[7]] = tag_idx\n",
    "                tag_idx += 1\n",
    "            if sentence[1] not in word2idx:\n",
    "                word2idx[sentence[1]] = word_idx\n",
    "                word_idx += 1\n",
    "            temp_word.append(word2idx[sentence[1]])\n",
    "            temp_depend.append(int(sentence[6]))\n",
    "            temp_label.append(tag2idx[sentence[7]])\n",
    "        else:\n",
    "            words.append(temp_word)\n",
    "            depends.append(temp_depend)\n",
    "            labels.append(temp_label)\n",
    "            temp_word = []\n",
    "            temp_depend = []\n",
    "            temp_label = []\n",
    "    return words[:-1], depends[:-1], labels[:-1]\n",
    "        \n",
    "words, depends, labels = process_corpus(corpus)\n",
    "words_test, depends_test, labels_test = process_corpus(corpus_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "from keras.preprocessing.sequence import pad_sequences"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "words = pad_sequences(words,padding='post')\n",
    "depends = pad_sequences(depends,padding='post')\n",
    "labels = pad_sequences(labels,padding='post')\n",
    "\n",
    "words_test = pad_sequences(words_test,padding='post')\n",
    "depends_test = pad_sequences(depends_test,padding='post')\n",
    "labels_test = pad_sequences(labels_test,padding='post')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "idx2word = {idx: tag for tag, idx in word2idx.items()}\n",
    "idx2tag = {i: w for w, i in tag2idx.items()}\n",
    "\n",
    "train_X = words\n",
    "train_Y = labels\n",
    "train_depends = depends\n",
    "\n",
    "test_X = words_test\n",
    "test_Y = labels_test\n",
    "test_depends = depends_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "maxlen = max(train_X.shape[1], test_X.shape[1])\n",
    "\n",
    "train_X = pad_sequences(train_X,padding='post',maxlen=maxlen)\n",
    "train_Y = pad_sequences(train_Y,padding='post',maxlen=maxlen)\n",
    "train_depends = pad_sequences(train_depends,padding='post',maxlen=maxlen)\n",
    "\n",
    "test_X = pad_sequences(test_X,padding='post',maxlen=maxlen)\n",
    "test_Y = pad_sequences(test_Y,padding='post',maxlen=maxlen)\n",
    "test_depends = pad_sequences(test_depends,padding='post',maxlen=maxlen)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Attention:\n",
    "    def __init__(self,hidden_size):\n",
    "        self.hidden_size = hidden_size\n",
    "        self.dense_layer = tf.layers.Dense(hidden_size)\n",
    "        self.v = tf.random_normal([hidden_size],mean=0,stddev=1/np.sqrt(hidden_size))\n",
    "        \n",
    "    def score(self, hidden_tensor, encoder_outputs):\n",
    "        energy = tf.nn.tanh(self.dense_layer(tf.concat([hidden_tensor,encoder_outputs],2)))\n",
    "        energy = tf.transpose(energy,[0,2,1])\n",
    "        batch_size = tf.shape(encoder_outputs)[0]\n",
    "        v = tf.expand_dims(tf.tile(tf.expand_dims(self.v,0),[batch_size,1]),1)\n",
    "        energy = tf.matmul(v,energy)\n",
    "        return tf.squeeze(energy,1)\n",
    "    \n",
    "    def __call__(self, hidden, encoder_outputs):\n",
    "        seq_len = tf.shape(encoder_outputs)[1]\n",
    "        batch_size = tf.shape(encoder_outputs)[0]\n",
    "        H = tf.tile(tf.expand_dims(hidden, 1),[1,seq_len,1])\n",
    "        attn_energies = self.score(H,encoder_outputs)\n",
    "        return tf.expand_dims(tf.nn.softmax(attn_energies),1)\n",
    "\n",
    "class Model:\n",
    "    def __init__(\n",
    "        self,\n",
    "        dict_size,\n",
    "        size_layers,\n",
    "        learning_rate,\n",
    "        maxlen,\n",
    "        num_blocks = 3,\n",
    "        block_size = 128,\n",
    "    ):\n",
    "        self.word_ids = tf.placeholder(tf.int32, shape = [None, maxlen])\n",
    "        self.labels = tf.placeholder(tf.int32, shape = [None, maxlen])\n",
    "        self.depends = tf.placeholder(tf.int32, shape = [None, maxlen])\n",
    "        embeddings = tf.Variable(tf.random_uniform([dict_size, size_layers], -1, 1))\n",
    "        embedded = tf.nn.embedding_lookup(embeddings, self.word_ids)\n",
    "        self.attention = Attention(size_layers)\n",
    "        self.maxlen = tf.shape(self.word_ids)[1]\n",
    "        self.lengths = tf.count_nonzero(self.word_ids, 1)\n",
    "\n",
    "        def residual_block(x, size, rate, block):\n",
    "            with tf.variable_scope(\n",
    "                'block_%d_%d' % (block, rate), reuse = False\n",
    "            ):\n",
    "                attn_weights = self.attention(tf.reduce_sum(x,axis=1), x)\n",
    "                conv_filter = tf.layers.conv1d(\n",
    "                    attn_weights,\n",
    "                    x.shape[2] // 4,\n",
    "                    kernel_size = size,\n",
    "                    strides = 1,\n",
    "                    padding = 'same',\n",
    "                    dilation_rate = rate,\n",
    "                    activation = tf.nn.tanh,\n",
    "                )\n",
    "                conv_gate = tf.layers.conv1d(\n",
    "                    x,\n",
    "                    x.shape[2] // 4,\n",
    "                    kernel_size = size,\n",
    "                    strides = 1,\n",
    "                    padding = 'same',\n",
    "                    dilation_rate = rate,\n",
    "                    activation = tf.nn.sigmoid,\n",
    "                )\n",
    "                out = tf.multiply(conv_filter, conv_gate)\n",
    "                out = tf.layers.conv1d(\n",
    "                    out,\n",
    "                    block_size,\n",
    "                    kernel_size = 1,\n",
    "                    strides = 1,\n",
    "                    padding = 'same',\n",
    "                    activation = tf.nn.tanh,\n",
    "                )\n",
    "                return tf.add(x, out), out\n",
    "\n",
    "        forward = tf.layers.conv1d(\n",
    "            embedded, block_size, kernel_size = 1, strides = 1, padding = 'SAME'\n",
    "        )\n",
    "        zeros = tf.zeros_like(forward)\n",
    "        for i in range(num_blocks):\n",
    "            for r in [1, 2, 4, 8, 16]:\n",
    "                forward, s = residual_block(\n",
    "                    forward, size = 7, rate = r, block = i\n",
    "                )\n",
    "                zeros = tf.add(zeros, s)\n",
    "        logits = tf.layers.conv1d(\n",
    "            zeros, len(idx2tag), kernel_size = 1, strides = 1, padding = 'SAME'\n",
    "        )\n",
    "        logits_depends = tf.layers.conv1d(\n",
    "            zeros, maxlen, kernel_size = 1, strides = 1, padding = 'SAME'\n",
    "        )\n",
    "        log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood(\n",
    "            logits, self.labels, self.lengths\n",
    "        )\n",
    "        with tf.variable_scope(\"depends\"):\n",
    "            log_likelihood_depends, transition_params_depends = tf.contrib.crf.crf_log_likelihood(\n",
    "                logits_depends, self.depends, self.lengths\n",
    "            )\n",
    "        self.cost = tf.reduce_mean(-log_likelihood) + tf.reduce_mean(-log_likelihood_depends)\n",
    "        self.optimizer = tf.train.AdamOptimizer(\n",
    "            learning_rate = learning_rate\n",
    "        ).minimize(self.cost)\n",
    "        \n",
    "        mask = tf.sequence_mask(self.lengths, maxlen = self.maxlen)\n",
    "        \n",
    "        self.tags_seq, _ = tf.contrib.crf.crf_decode(\n",
    "            logits, transition_params, self.lengths\n",
    "        )\n",
    "        self.tags_seq_depends, _ = tf.contrib.crf.crf_decode(\n",
    "            logits_depends, transition_params_depends, self.lengths\n",
    "        )\n",
    "\n",
    "        self.prediction = tf.boolean_mask(self.tags_seq, mask)\n",
    "        mask_label = tf.boolean_mask(self.labels, mask)\n",
    "        correct_pred = tf.equal(self.prediction, mask_label)\n",
    "        correct_index = tf.cast(correct_pred, tf.float32)\n",
    "        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))\n",
    "        \n",
    "        self.prediction = tf.boolean_mask(self.tags_seq_depends, mask)\n",
    "        mask_label = tf.boolean_mask(self.depends, mask)\n",
    "        correct_pred = tf.equal(self.prediction, mask_label)\n",
    "        correct_index = tf.cast(correct_pred, tf.float32)\n",
    "        self.accuracy_depends = tf.reduce_mean(tf.cast(correct_pred, tf.float32))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gradients_impl.py:100: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.\n",
      "  \"Converting sparse IndexedSlices to a dense Tensor of unknown shape. \"\n"
     ]
    }
   ],
   "source": [
    "tf.reset_default_graph()\n",
    "sess = tf.InteractiveSession()\n",
    "\n",
    "dim = 256\n",
    "dropout = 1\n",
    "learning_rate = 1e-3\n",
    "batch_size = 32\n",
    "\n",
    "model = Model(len(word2idx), dim, learning_rate, maxlen)\n",
    "sess.run(tf.global_variables_initializer())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 76/76 [00:17<00:00,  3.64it/s, accuracy=0.497, accuracy_depends=0.0582, cost=96.4]\n",
      "test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 10.84it/s, accuracy=0.518, accuracy_depends=0.0636, cost=151]\n",
      "train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 22.717660427093506\n",
      "epoch: 0, training loss: 153.183009, training acc: 0.294935, training depends: 0.048782, valid loss: 129.335049, valid acc: 0.491078, valid depends: 0.077788\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.47it/s, accuracy=0.634, accuracy_depends=0.171, cost=71.4]\n",
      "test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.90it/s, accuracy=0.755, accuracy_depends=0.145, cost=113] \n",
      "train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 20.58572268486023\n",
      "epoch: 1, training loss: 110.329521, training acc: 0.580832, training depends: 0.113730, valid loss: 100.057401, valid acc: 0.659700, valid depends: 0.141831\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.48it/s, accuracy=0.726, accuracy_depends=0.226, cost=59.4]\n",
      "test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.67it/s, accuracy=0.818, accuracy_depends=0.127, cost=97.3]\n",
      "train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s, accuracy=0.739, accuracy_depends=0.168, cost=79.6]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 20.490660667419434\n",
      "epoch: 2, training loss: 88.406940, training acc: 0.712373, training depends: 0.173413, valid loss: 88.115075, valid acc: 0.722415, valid depends: 0.169411\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.35it/s, accuracy=0.805, accuracy_depends=0.281, cost=50.7]\n",
      "test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 11.75it/s, accuracy=0.8, accuracy_depends=0.155, cost=90.2]  \n",
      "train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 20.47122573852539\n",
      "epoch: 3, training loss: 76.711710, training acc: 0.783817, training depends: 0.216270, valid loss: 81.675866, valid acc: 0.755237, valid depends: 0.195363\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 76/76 [00:16<00:00,  5.43it/s, accuracy=0.873, accuracy_depends=0.349, cost=43.6]\n",
      "test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.81it/s, accuracy=0.818, accuracy_depends=0.182, cost=85.8]\n",
      "train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 20.655714988708496\n",
      "epoch: 4, training loss: 68.472157, training acc: 0.831365, training depends: 0.259200, valid loss: 77.894161, valid acc: 0.775189, valid depends: 0.222546\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.49it/s, accuracy=0.938, accuracy_depends=0.38, cost=37.7] \n",
      "test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 11.73it/s, accuracy=0.827, accuracy_depends=0.245, cost=83.4]\n",
      "train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 20.527581453323364\n",
      "epoch: 5, training loss: 61.882243, training acc: 0.866012, training depends: 0.299192, valid loss: 75.928109, valid acc: 0.782752, valid depends: 0.242099\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.48it/s, accuracy=0.962, accuracy_depends=0.469, cost=32.9]\n",
      "test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.72it/s, accuracy=0.827, accuracy_depends=0.245, cost=82.1]\n",
      "train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 20.484257698059082\n",
      "epoch: 6, training loss: 56.255667, training acc: 0.893967, training depends: 0.341305, valid loss: 75.448544, valid acc: 0.786553, valid depends: 0.254019\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.38it/s, accuracy=0.979, accuracy_depends=0.534, cost=28.7]\n",
      "test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.78it/s, accuracy=0.827, accuracy_depends=0.236, cost=82.8]\n",
      "train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 20.61243438720703\n",
      "epoch: 7, training loss: 51.263502, training acc: 0.917225, training depends: 0.383208, valid loss: 76.300179, valid acc: 0.783883, valid depends: 0.259615\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.49it/s, accuracy=0.986, accuracy_depends=0.579, cost=25.1]\n",
      "test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 13.08it/s, accuracy=0.8, accuracy_depends=0.218, cost=86.5]  \n",
      "train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 20.4697368144989\n",
      "epoch: 8, training loss: 46.744222, training acc: 0.934576, training depends: 0.428258, valid loss: 77.713942, valid acc: 0.784674, valid depends: 0.261951\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.40it/s, accuracy=0.986, accuracy_depends=0.644, cost=21.8]\n",
      "test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.54it/s, accuracy=0.836, accuracy_depends=0.209, cost=94.3]\n",
      "train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 20.674675703048706\n",
      "epoch: 9, training loss: 42.561942, training acc: 0.949874, training depends: 0.470683, valid loss: 81.004866, valid acc: 0.780429, valid depends: 0.263705\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.46it/s, accuracy=0.986, accuracy_depends=0.705, cost=19.1]\n",
      "test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.57it/s, accuracy=0.855, accuracy_depends=0.264, cost=89.2]\n",
      "train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 20.510538816452026\n",
      "epoch: 10, training loss: 38.979279, training acc: 0.960487, training depends: 0.507692, valid loss: 79.328121, valid acc: 0.784113, valid depends: 0.287815\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.51it/s, accuracy=0.993, accuracy_depends=0.777, cost=15.1]\n",
      "test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.50it/s, accuracy=0.836, accuracy_depends=0.3, cost=85.7]  \n",
      "train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 20.53708791732788\n",
      "epoch: 11, training loss: 35.629965, training acc: 0.968517, training depends: 0.553509, valid loss: 81.083164, valid acc: 0.784796, valid depends: 0.287922\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.47it/s, accuracy=0.997, accuracy_depends=0.846, cost=13.3]\n",
      "test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.81it/s, accuracy=0.818, accuracy_depends=0.291, cost=96.6]\n",
      "train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 20.565435647964478\n",
      "epoch: 12, training loss: 32.694344, training acc: 0.974347, training depends: 0.593964, valid loss: 84.288952, valid acc: 0.781696, valid depends: 0.291779\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.49it/s, accuracy=1, accuracy_depends=0.904, cost=10.5]    \n",
      "test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.77it/s, accuracy=0.8, accuracy_depends=0.309, cost=97]    \n",
      "train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 20.49126100540161\n",
      "epoch: 13, training loss: 28.929201, training acc: 0.978603, training depends: 0.648120, valid loss: 87.836118, valid acc: 0.779976, valid depends: 0.289016\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.50it/s, accuracy=1, accuracy_depends=0.914, cost=9.43]    \n",
      "test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.41it/s, accuracy=0.809, accuracy_depends=0.364, cost=94]  \n",
      "train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 20.524020671844482\n",
      "epoch: 14, training loss: 26.025858, training acc: 0.981591, training depends: 0.689773, valid loss: 89.180642, valid acc: 0.776449, valid depends: 0.298843\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.26it/s, accuracy=1, accuracy_depends=0.918, cost=7.58]    \n",
      "test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 11.67it/s, accuracy=0.8, accuracy_depends=0.318, cost=99.5]  \n",
      "train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 20.600938320159912\n",
      "epoch: 15, training loss: 23.192569, training acc: 0.987144, training depends: 0.726383, valid loss: 94.854855, valid acc: 0.770395, valid depends: 0.286692\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.36it/s, accuracy=0.997, accuracy_depends=0.955, cost=5.65]\n",
      "test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.75it/s, accuracy=0.809, accuracy_depends=0.355, cost=102] \n",
      "train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 20.59098505973816\n",
      "epoch: 16, training loss: 20.472838, training acc: 0.992866, training depends: 0.764563, valid loss: 97.303479, valid acc: 0.769268, valid depends: 0.287900\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 76/76 [00:16<00:00,  5.41it/s, accuracy=1, accuracy_depends=0.986, cost=4.23]    \n",
      "test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.59it/s, accuracy=0.809, accuracy_depends=0.336, cost=104] \n",
      "train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 20.69632053375244\n",
      "epoch: 17, training loss: 17.953982, training acc: 0.995921, training depends: 0.801940, valid loss: 100.876137, valid acc: 0.769323, valid depends: 0.283341\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 76/76 [00:16<00:00,  5.31it/s, accuracy=1, accuracy_depends=0.983, cost=3.82]    \n",
      "test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.80it/s, accuracy=0.809, accuracy_depends=0.336, cost=102] \n",
      "train minibatch loop:   0%|          | 0/76 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 20.720519304275513\n",
      "epoch: 18, training loss: 15.907509, training acc: 0.998263, training depends: 0.829425, valid loss: 103.454220, valid acc: 0.772234, valid depends: 0.287102\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "train minibatch loop: 100%|██████████| 76/76 [00:15<00:00,  5.40it/s, accuracy=1, accuracy_depends=0.973, cost=3.46]    \n",
      "test minibatch loop: 100%|██████████| 54/54 [00:04<00:00, 12.69it/s, accuracy=0.845, accuracy_depends=0.409, cost=101] "
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken: 20.618144035339355\n",
      "epoch: 19, training loss: 14.017608, training acc: 0.999365, training depends: 0.851850, valid loss: 108.007175, valid acc: 0.771574, valid depends: 0.283096\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "import time\n",
    "\n",
    "for e in range(20):\n",
    "    lasttime = time.time()\n",
    "    train_acc, train_loss, test_acc, test_loss, train_acc_depends, test_acc_depends = 0, 0, 0, 0, 0, 0\n",
    "    pbar = tqdm(\n",
    "        range(0, len(train_X), batch_size), desc = 'train minibatch loop'\n",
    "    )\n",
    "    for i in pbar:\n",
    "        batch_x = train_X[i : min(i + batch_size, train_X.shape[0])]\n",
    "        batch_y = train_Y[i : min(i + batch_size, train_X.shape[0])]\n",
    "        batch_depends = train_depends[i : min(i + batch_size, train_X.shape[0])]\n",
    "        acc_depends, acc, cost, _ = sess.run(\n",
    "            [model.accuracy_depends, model.accuracy, model.cost, model.optimizer],\n",
    "            feed_dict = {\n",
    "                model.word_ids: batch_x,\n",
    "                model.labels: batch_y,\n",
    "                model.depends: batch_depends\n",
    "            },\n",
    "        )\n",
    "        assert not np.isnan(cost)\n",
    "        train_loss += cost\n",
    "        train_acc += acc\n",
    "        train_acc_depends += acc_depends\n",
    "        pbar.set_postfix(cost = cost, accuracy = acc, accuracy_depends = acc_depends)\n",
    "        \n",
    "    pbar = tqdm(\n",
    "        range(0, len(test_X), batch_size), desc = 'test minibatch loop'\n",
    "    )\n",
    "    for i in pbar:\n",
    "        batch_x = test_X[i : min(i + batch_size, test_X.shape[0])]\n",
    "        batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]\n",
    "        batch_depends = test_depends[i : min(i + batch_size, test_X.shape[0])]\n",
    "        acc_depends, acc, cost = sess.run(\n",
    "            [model.accuracy_depends, model.accuracy, model.cost],\n",
    "            feed_dict = {\n",
    "                model.word_ids: batch_x,\n",
    "                model.labels: batch_y,\n",
    "                model.depends: batch_depends\n",
    "            },\n",
    "        )\n",
    "        assert not np.isnan(cost)\n",
    "        test_loss += cost\n",
    "        test_acc += acc\n",
    "        test_acc_depends += acc_depends\n",
    "        pbar.set_postfix(cost = cost, accuracy = acc, accuracy_depends = acc_depends)\n",
    "    \n",
    "    train_loss /= len(train_X) / batch_size\n",
    "    train_acc /= len(train_X) / batch_size\n",
    "    train_acc_depends /= len(train_X) / batch_size\n",
    "    test_loss /= len(test_X) / batch_size\n",
    "    test_acc /= len(test_X) / batch_size\n",
    "    test_acc_depends /= len(test_X) / batch_size\n",
    "\n",
    "    print('time taken:', time.time() - lasttime)\n",
    "    print(\n",
    "        'epoch: %d, training loss: %f, training acc: %f, training depends: %f, valid loss: %f, valid acc: %f, valid depends: %f\\n'\n",
    "        % (e, train_loss, train_acc, train_acc_depends, test_loss, test_acc, test_acc_depends)\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "seq, deps = sess.run([model.tags_seq, model.tags_seq_depends],\n",
    "        feed_dict={model.word_ids:batch_x[:1]})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "seq = seq[0]\n",
    "deps = deps[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([18, 19,  2,  6,  3,  7, 16, 18, 23, 20, 19,  2], dtype=int32)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "seq[seq>0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([18, 19,  2,  6,  3,  7, 16, 18, 23, 20, 19,  2], dtype=int32)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "batch_y[0][seq>0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 2, 14, 11,  5,  6,  0,  3, 11, 11, 11,  6,  6], dtype=int32)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "deps[seq>0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 2,  6,  6,  5,  6,  0,  6, 11, 11, 11,  6,  6], dtype=int32)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "batch_depends[0][seq>0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
